import numpy as np
import pandas as pd
import plotly.express as px
import plotly.io as pio
import requests
import json 12wk-1: ???
plotly
1. 강의영상
2. Imports
pd.options.plotting.backend = "plotly"
pio.templates.default = "plotly_white"us_dict = json.loads(requests.get('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json').text)newyork_dict = us_dict.copy()
newyork_dict['features'] = [l for l in us_dict['features'] if "New York" in l['properties']['NAME']]3.
df = pd.read_csv("NYCTaxi.csv")[::100].reset_index(drop=True)
df.columnsIndex(['id', 'vendor_id', 'pickup_datetime', 'dropoff_datetime',
'passenger_count', 'pickup_longitude', 'pickup_latitude',
'dropoff_longitude', 'dropoff_latitude', 'store_and_fwd_flag',
'trip_duration'],
dtype='object')
df| id | vendor_id | pickup_datetime | dropoff_datetime | passenger_count | pickup_longitude | pickup_latitude | dropoff_longitude | dropoff_latitude | store_and_fwd_flag | trip_duration | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | id2875421 | 2 | 2016-03-14 17:24:55 | 2016-03-14 17:32:30 | 1 | -73.982155 | 40.767937 | -73.964630 | 40.765602 | N | 455 |
| 1 | id3194108 | 1 | 2016-06-01 11:48:41 | 2016-06-01 12:19:07 | 1 | -74.005028 | 40.746452 | -73.972008 | 40.745781 | N | 1826 |
| 2 | id3564028 | 1 | 2016-01-02 01:16:42 | 2016-01-02 01:19:56 | 1 | -73.954132 | 40.774784 | -73.947418 | 40.779633 | N | 194 |
| 3 | id1660823 | 2 | 2016-03-01 06:40:18 | 2016-03-01 07:01:37 | 5 | -73.982140 | 40.775326 | -74.009850 | 40.721699 | N | 1279 |
| 4 | id1575277 | 2 | 2016-06-11 16:59:15 | 2016-06-11 17:33:27 | 1 | -73.999229 | 40.722881 | -73.982880 | 40.778297 | N | 2052 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 14582 | id3647353 | 1 | 2016-05-16 22:12:09 | 2016-05-16 22:27:46 | 1 | -73.990219 | 40.737076 | -73.986748 | 40.702194 | N | 937 |
| 14583 | id2064944 | 1 | 2016-05-23 08:04:35 | 2016-05-23 08:19:20 | 1 | -73.987068 | 40.730728 | -73.974983 | 40.751331 | N | 885 |
| 14584 | id3286731 | 2 | 2016-05-31 16:56:13 | 2016-05-31 17:38:44 | 1 | -73.863541 | 40.769711 | -73.994644 | 40.750435 | N | 2551 |
| 14585 | id3453691 | 2 | 2016-03-07 18:11:54 | 2016-03-07 18:29:09 | 1 | -74.006531 | 40.738232 | -73.985970 | 40.726978 | N | 1035 |
| 14586 | id0995846 | 2 | 2016-05-09 17:26:56 | 2016-05-09 18:30:37 | 2 | -73.789543 | 40.647099 | -73.960320 | 40.798180 | N | 3821 |
14587 rows × 11 columns
fig = px.scatter_mapbox(
data_frame=df,
lat='pickup_latitude',
lon='pickup_longitude',
opacity=0.3,
center={'lat': 40.7322, 'lon': -73.9052},
mapbox_style='carto-positron',
zoom=10,
width=800,
height=600
)
fig.update_traces(
marker={'size':2}
)
fig.show(config={'scrollZoom': False})fig = px.density_mapbox(
data_frame=df,
lat = 'pickup_latitude',
lon = 'pickup_longitude',
center = {'lat':40.7322, 'lon':-73.9052},
radius = 1,
#---#
mapbox_style='carto-positron',
zoom=10,
width=800,
height=600
)
fig.show(config={'scrollZoom': False})fig = px.density_mapbox(
data_frame=df,
lat = 'pickup_latitude',
lon = 'pickup_longitude',
center = {'lat':40.7322, 'lon':-73.9052},
radius = 2,
z='passenger_count',
#---#
mapbox_style='carto-positron',
zoom=10,
width=800,
height=600
)
fig.show(config={'scrollZoom': False})df.assign(log_trip_duration = lambda df: np.log(df.trip_duration))| id | vendor_id | pickup_datetime | dropoff_datetime | passenger_count | pickup_longitude | pickup_latitude | dropoff_longitude | dropoff_latitude | store_and_fwd_flag | trip_duration | log_trip_duration | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | id2875421 | 2 | 2016-03-14 17:24:55 | 2016-03-14 17:32:30 | 1 | -73.982155 | 40.767937 | -73.964630 | 40.765602 | N | 455 | 6.120297 |
| 1 | id3194108 | 1 | 2016-06-01 11:48:41 | 2016-06-01 12:19:07 | 1 | -74.005028 | 40.746452 | -73.972008 | 40.745781 | N | 1826 | 7.509883 |
| 2 | id3564028 | 1 | 2016-01-02 01:16:42 | 2016-01-02 01:19:56 | 1 | -73.954132 | 40.774784 | -73.947418 | 40.779633 | N | 194 | 5.267858 |
| 3 | id1660823 | 2 | 2016-03-01 06:40:18 | 2016-03-01 07:01:37 | 5 | -73.982140 | 40.775326 | -74.009850 | 40.721699 | N | 1279 | 7.153834 |
| 4 | id1575277 | 2 | 2016-06-11 16:59:15 | 2016-06-11 17:33:27 | 1 | -73.999229 | 40.722881 | -73.982880 | 40.778297 | N | 2052 | 7.626570 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 14582 | id3647353 | 1 | 2016-05-16 22:12:09 | 2016-05-16 22:27:46 | 1 | -73.990219 | 40.737076 | -73.986748 | 40.702194 | N | 937 | 6.842683 |
| 14583 | id2064944 | 1 | 2016-05-23 08:04:35 | 2016-05-23 08:19:20 | 1 | -73.987068 | 40.730728 | -73.974983 | 40.751331 | N | 885 | 6.785588 |
| 14584 | id3286731 | 2 | 2016-05-31 16:56:13 | 2016-05-31 17:38:44 | 1 | -73.863541 | 40.769711 | -73.994644 | 40.750435 | N | 2551 | 7.844241 |
| 14585 | id3453691 | 2 | 2016-03-07 18:11:54 | 2016-03-07 18:29:09 | 1 | -74.006531 | 40.738232 | -73.985970 | 40.726978 | N | 1035 | 6.942157 |
| 14586 | id0995846 | 2 | 2016-05-09 17:26:56 | 2016-05-09 18:30:37 | 2 | -73.789543 | 40.647099 | -73.960320 | 40.798180 | N | 3821 | 8.248267 |
14587 rows × 12 columns
fig = px.density_mapbox(
data_frame=df.assign(log_trip_duration = lambda df: np.log(df.trip_duration)),
lat = 'pickup_latitude',
lon = 'pickup_longitude',
center = {'lat':40.7322, 'lon':-73.9052},
radius = 1.5,
z='log_trip_duration',
#---#
mapbox_style='carto-positron',
zoom=10,
width=800,
height=600
)
fig.show(config={'scrollZoom': False})df.assign(alone = df.passenger_count == 1).assign(vendor_id = lambda df: df.vendor_id.astype(str))| id | vendor_id | pickup_datetime | dropoff_datetime | passenger_count | pickup_longitude | pickup_latitude | dropoff_longitude | dropoff_latitude | store_and_fwd_flag | trip_duration | alone | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | id2875421 | 2 | 2016-03-14 17:24:55 | 2016-03-14 17:32:30 | 1 | -73.982155 | 40.767937 | -73.964630 | 40.765602 | N | 455 | True |
| 1 | id3194108 | 1 | 2016-06-01 11:48:41 | 2016-06-01 12:19:07 | 1 | -74.005028 | 40.746452 | -73.972008 | 40.745781 | N | 1826 | True |
| 2 | id3564028 | 1 | 2016-01-02 01:16:42 | 2016-01-02 01:19:56 | 1 | -73.954132 | 40.774784 | -73.947418 | 40.779633 | N | 194 | True |
| 3 | id1660823 | 2 | 2016-03-01 06:40:18 | 2016-03-01 07:01:37 | 5 | -73.982140 | 40.775326 | -74.009850 | 40.721699 | N | 1279 | False |
| 4 | id1575277 | 2 | 2016-06-11 16:59:15 | 2016-06-11 17:33:27 | 1 | -73.999229 | 40.722881 | -73.982880 | 40.778297 | N | 2052 | True |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 14582 | id3647353 | 1 | 2016-05-16 22:12:09 | 2016-05-16 22:27:46 | 1 | -73.990219 | 40.737076 | -73.986748 | 40.702194 | N | 937 | True |
| 14583 | id2064944 | 1 | 2016-05-23 08:04:35 | 2016-05-23 08:19:20 | 1 | -73.987068 | 40.730728 | -73.974983 | 40.751331 | N | 885 | True |
| 14584 | id3286731 | 2 | 2016-05-31 16:56:13 | 2016-05-31 17:38:44 | 1 | -73.863541 | 40.769711 | -73.994644 | 40.750435 | N | 2551 | True |
| 14585 | id3453691 | 2 | 2016-03-07 18:11:54 | 2016-03-07 18:29:09 | 1 | -74.006531 | 40.738232 | -73.985970 | 40.726978 | N | 1035 | True |
| 14586 | id0995846 | 2 | 2016-05-09 17:26:56 | 2016-05-09 18:30:37 | 2 | -73.789543 | 40.647099 | -73.960320 | 40.798180 | N | 3821 | False |
14587 rows × 12 columns
fig = px.scatter_mapbox(
data_frame=df.assign(alone = df.passenger_count == 1).assign(vendor_id = lambda df: df.vendor_id.astype(str)),
lat = 'pickup_latitude',
lon = 'pickup_longitude',
opacity = 0.3,
center = {'lat':40.7322, 'lon':-73.9052},
color = 'vendor_id',
#---#
mapbox_style='carto-positron',
zoom=10,
width=800,
height=600
)
fig.update_traces(
marker={'size':2}
)
fig.show(config={'scrollZoom': False})# df.pickup_datetime.str.split(' ').str[-1].str.split(':').str[0].apply(int)
# df.pickup_datetime.apply(pd.to_datetime).dt.hourtidydata = df.assign(
alone = df.passenger_count == 1,
hour = df.pickup_datetime.apply(pd.to_datetime).dt.hour,
vendor_id = df.vendor_id.astype(str),
log_trip_duration = np.log(df.trip_duration)>8
).sort_values(by='hour')fig = px.scatter_mapbox(
data_frame=tidydata,
lat = 'pickup_latitude',
lon = 'pickup_longitude',
center = {'lat':40.7322, 'lon':-73.9052},
color = 'vendor_id',
size = 'passenger_count',
size_max = 5,
animation_frame = 'hour',
#---#
mapbox_style='carto-positron',
zoom=10,
width=800,
height=600
)
fig.show(config={'scrollZoom': False})tidydata\
.groupby('vendor_id').agg({'passenger_count':'mean'})\
.reset_index()\
.plot.bar(x='vendor_id',y='passenger_count',color='vendor_id',text='passenger_count')tidydata.log_trip_duration.min(), tidydata.log_trip_duration.max()(False, True)
fig = px.scatter_mapbox(
data_frame=tidydata,
lat = 'pickup_latitude',
lon = 'pickup_longitude',
center = {'lat':40.7322, 'lon':-73.9052},
color = 'log_trip_duration',
size = 'passenger_count',
size_max = 5,
animation_frame = 'hour',
range_color = (tidydata.log_trip_duration.min(), tidydata.log_trip_duration.max()),
#---#
mapbox_style='carto-positron',
zoom=10,
width=800,
height=600
)
fig.show(config={'scrollZoom': False})df_small = df[::100].reset_index(drop=True)df_small| id | vendor_id | pickup_datetime | dropoff_datetime | passenger_count | pickup_longitude | pickup_latitude | dropoff_longitude | dropoff_latitude | store_and_fwd_flag | trip_duration | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | id2875421 | 2 | 2016-03-14 17:24:55 | 2016-03-14 17:32:30 | 1 | -73.982155 | 40.767937 | -73.964630 | 40.765602 | N | 455 |
| 1 | id3667993 | 2 | 2016-01-03 04:18:57 | 2016-01-03 04:27:03 | 1 | -73.980522 | 40.730530 | -73.997993 | 40.746220 | N | 486 |
| 2 | id2002463 | 2 | 2016-01-14 12:28:56 | 2016-01-14 12:37:17 | 1 | -73.965652 | 40.768398 | -73.960068 | 40.779308 | N | 501 |
| 3 | id1635353 | 2 | 2016-03-04 23:20:58 | 2016-03-04 23:49:29 | 5 | -73.985092 | 40.759190 | -73.962151 | 40.709850 | N | 1711 |
| 4 | id1850636 | 1 | 2016-02-05 00:21:28 | 2016-02-05 00:52:24 | 1 | -73.994537 | 40.750439 | -74.025719 | 40.631100 | N | 1856 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 141 | id0621879 | 1 | 2016-04-23 09:31:33 | 2016-04-23 09:51:33 | 1 | -73.950783 | 40.743614 | -74.006218 | 40.722729 | N | 1200 |
| 142 | id2587483 | 2 | 2016-03-28 12:59:58 | 2016-03-28 13:08:11 | 2 | -73.953903 | 40.787079 | -73.940842 | 40.792461 | N | 493 |
| 143 | id1030598 | 2 | 2016-03-03 11:44:24 | 2016-03-03 11:49:59 | 1 | -74.005066 | 40.719143 | -74.006065 | 40.735134 | N | 335 |
| 144 | id3094934 | 1 | 2016-03-21 09:53:40 | 2016-03-21 10:22:20 | 1 | -73.986153 | 40.722431 | -73.985977 | 40.762669 | N | 1720 |
| 145 | id0503659 | 2 | 2016-04-19 18:06:09 | 2016-04-19 18:23:09 | 2 | -73.952209 | 40.784500 | -73.966103 | 40.804832 | N | 1020 |
146 rows × 11 columns
def transform(df):
pick_up = df.loc[:,['id','pickup_datetime','pickup_longitude','pickup_latitude']].set_axis(['id','datetime','lon','lat'],axis=1).eval('state = "pickup"')
drop_off = df.loc[:,['id','dropoff_datetime','dropoff_longitude','dropoff_latitude']].set_axis(['id','datetime','lon','lat'],axis=1).eval('state = "dropoff"')
return pd.concat([pick_up,drop_off],axis=0).reset_index(drop=True)pd.concat([transform(df) for i,df in df_small.groupby('id')]).reset_index(drop=True)\
.merge(df_small.drop(['pickup_datetime','pickup_longitude','pickup_latitude','dropoff_datetime','dropoff_longitude','dropoff_latitude'],axis=1))| id | datetime | lon | lat | state | vendor_id | passenger_count | store_and_fwd_flag | trip_duration | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | id0037819 | 2016-05-16 17:42:32 | -73.986420 | 40.756569 | pickup | 2 | 6 | N | 273 |
| 1 | id0037819 | 2016-05-16 17:47:05 | -73.995300 | 40.740059 | dropoff | 2 | 6 | N | 273 |
| 2 | id0049607 | 2016-03-13 18:48:49 | -73.975922 | 40.754192 | pickup | 1 | 2 | N | 439 |
| 3 | id0049607 | 2016-03-13 18:56:08 | -73.988922 | 40.762859 | dropoff | 1 | 2 | N | 439 |
| 4 | id0051866 | 2016-01-04 18:48:12 | -73.962654 | 40.772449 | pickup | 1 | 1 | N | 638 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 287 | id3825370 | 2016-05-08 17:36:48 | -73.979195 | 40.669765 | dropoff | 1 | 4 | N | 2358 |
| 288 | id3888107 | 2016-06-21 18:30:05 | -73.969429 | 40.757469 | pickup | 2 | 1 | N | 878 |
| 289 | id3888107 | 2016-06-21 18:44:43 | -73.982742 | 40.771969 | dropoff | 2 | 1 | N | 878 |
| 290 | id3988208 | 2016-03-01 21:40:13 | -73.948929 | 40.797405 | pickup | 1 | 1 | N | 433 |
| 291 | id3988208 | 2016-03-01 21:47:26 | -73.967438 | 40.789543 | dropoff | 1 | 1 | N | 433 |
292 rows × 9 columns
df2 = pd.concat([transform(df) for i,df in df_small.groupby('id')]).reset_index(drop=True)\
.merge(df_small.drop(['pickup_datetime','pickup_longitude','pickup_latitude','dropoff_datetime','dropoff_longitude','dropoff_latitude'],axis=1))df2| id | datetime | lon | lat | state | vendor_id | passenger_count | store_and_fwd_flag | trip_duration | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | id0037819 | 2016-05-16 17:42:32 | -73.986420 | 40.756569 | pickup | 2 | 6 | N | 273 |
| 1 | id0037819 | 2016-05-16 17:47:05 | -73.995300 | 40.740059 | dropoff | 2 | 6 | N | 273 |
| 2 | id0049607 | 2016-03-13 18:48:49 | -73.975922 | 40.754192 | pickup | 1 | 2 | N | 439 |
| 3 | id0049607 | 2016-03-13 18:56:08 | -73.988922 | 40.762859 | dropoff | 1 | 2 | N | 439 |
| 4 | id0051866 | 2016-01-04 18:48:12 | -73.962654 | 40.772449 | pickup | 1 | 1 | N | 638 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 287 | id3825370 | 2016-05-08 17:36:48 | -73.979195 | 40.669765 | dropoff | 1 | 4 | N | 2358 |
| 288 | id3888107 | 2016-06-21 18:30:05 | -73.969429 | 40.757469 | pickup | 2 | 1 | N | 878 |
| 289 | id3888107 | 2016-06-21 18:44:43 | -73.982742 | 40.771969 | dropoff | 2 | 1 | N | 878 |
| 290 | id3988208 | 2016-03-01 21:40:13 | -73.948929 | 40.797405 | pickup | 1 | 1 | N | 433 |
| 291 | id3988208 | 2016-03-01 21:47:26 | -73.967438 | 40.789543 | dropoff | 1 | 1 | N | 433 |
292 rows × 9 columns
df2.assign(
alone = lambda df: df.passenger_count == 1,
hour = lambda df: df.datetime.apply(pd.to_datetime).dt.hour
)| id | datetime | lon | lat | state | vendor_id | passenger_count | store_and_fwd_flag | trip_duration | alone | hour | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | id0037819 | 2016-05-16 17:42:32 | -73.986420 | 40.756569 | pickup | 2 | 6 | N | 273 | False | 17 |
| 1 | id0037819 | 2016-05-16 17:47:05 | -73.995300 | 40.740059 | dropoff | 2 | 6 | N | 273 | False | 17 |
| 2 | id0049607 | 2016-03-13 18:48:49 | -73.975922 | 40.754192 | pickup | 1 | 2 | N | 439 | False | 18 |
| 3 | id0049607 | 2016-03-13 18:56:08 | -73.988922 | 40.762859 | dropoff | 1 | 2 | N | 439 | False | 18 |
| 4 | id0051866 | 2016-01-04 18:48:12 | -73.962654 | 40.772449 | pickup | 1 | 1 | N | 638 | True | 18 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 287 | id3825370 | 2016-05-08 17:36:48 | -73.979195 | 40.669765 | dropoff | 1 | 4 | N | 2358 | False | 17 |
| 288 | id3888107 | 2016-06-21 18:30:05 | -73.969429 | 40.757469 | pickup | 2 | 1 | N | 878 | True | 18 |
| 289 | id3888107 | 2016-06-21 18:44:43 | -73.982742 | 40.771969 | dropoff | 2 | 1 | N | 878 | True | 18 |
| 290 | id3988208 | 2016-03-01 21:40:13 | -73.948929 | 40.797405 | pickup | 1 | 1 | N | 433 | True | 21 |
| 291 | id3988208 | 2016-03-01 21:47:26 | -73.967438 | 40.789543 | dropoff | 1 | 1 | N | 433 | True | 21 |
292 rows × 11 columns
fig = px.line_mapbox(
df2.assign(alone = lambda df: df.passenger_count == 1, hour = lambda df: df.datetime.apply(pd.to_datetime).dt.hour).sort_values('hour'),
lat="lat",
lon="lon",
center = {'lat':40.7322, 'lon':-73.9052},
line_group = 'id',
color='alone',
hover_data = 'hour',
#---#
mapbox_style='carto-positron',
zoom=10,
width=800,
height=600,
)
fig.add_trace(
px.scatter_mapbox(
data_frame=df2.assign(alone = lambda df: df.passenger_count == 1, hour = lambda df: df.datetime.apply(pd.to_datetime).dt.hour).sort_values('hour'),
lat = 'lat',
lon = 'lon',
center = {'lat':40.7322, 'lon':-73.9052},
color = 'alone',
size = 'trip_duration',
size_max = 10,
).data[0]
)
fig.add_trace(
px.scatter_mapbox(
data_frame=df2.assign(alone = lambda df: df.passenger_count == 1, hour = lambda df: df.datetime.apply(pd.to_datetime).dt.hour).sort_values('hour'),
lat = 'lat',
lon = 'lon',
center = {'lat':40.7322, 'lon':-73.9052},
color = 'alone',
size = 'trip_duration',
size_max = 10,
).data[1]
)
fig.update_traces(
line={
'width':1,
},
opacity=0.8
)
fig.show(config={'scrollZoom': False})